library(qualtRics)
library(magrittr)
library(dplyr)
library(emmeans)
library(tidyr)
library(ggplot2)
library(estimatr)
library(stm)
library(expss)
library(labelled)
library(vader)
library(hrbrthemes)
library(stringr)


# import data
study1 <- qualtRics::read_survey("data/study1.csv")

text_combined <- study1 %>%
  transmute(text = paste(!!!dplyr::select(., contains("acc")))) %>%
  mutate(text = gsub("NA", "", text)) %>%
  mutate(text = gsub("[^A-Za-z ]","",text)) %>%
  mutate(text = stringr::str_trim(text)) %>%
  pull(text)

# rename prompt variables
prompts <- tibble(name = colnames(study1), # tibble of var names, labels
                  label = labelled::var_label((study1))) %>%
  mutate(name = ifelse(!str_detect(name, "^acc") | str_detect(name, "DO_"),
                       NA, name), # NA if irrelevant
         label = ifelse(!str_detect(name, "^acc") | str_detect(name, "DO_"),
                        NA, label),
         label = ifelse(str_detect(name, "2p2c"), # shorten labels
                        str_c(str_sub(label, -5, -2), str_sub(name, 4, 10)),
                        str_c(str_sub(label, -5, -2), str_sub(name, 4, 6))))

i1 <- match(colnames(study1), prompts$name, nomatch = 0) # find prompt variables
colnames(study1)[i1] <- prompts$label[i1] # replace var names with short labels

# function takes elements of study1$con and the relevant response columns,
# then returns the response (as character or NA)
response <- function(info, pro, con, mixed){
  if_else(info == "4p", as.character(pro),
          if_else(info == "4c", as.character(con),
                  if_else(info == "2p2c", as.character(mixed), NA_character_)))
}

# recode variables
study1 %>%
  transmute(id,
            likert,
            introduction,
            pro1, pro1_r = response(con, pro1_4p, pro1_4c, pro1_2p2c_p),
            pro2, pro2_r = response(con, pro2_4p, pro2_4c, pro2_2p2c_p),
            pro3, pro3_r = response(con, pro3_4p, pro3_4c, pro3_2p2c_p),
            pro4, pro4_r = response(con, pro4_4p, pro4_4c, pro4_2p2c_p),
            con1, con1_r = response(con, con1_4p, con1_4c, con1_2p2c_c),
            con2, con2_r = response(con, con2_4p, con2_4c, con2_2p2c_c),
            con3, con3_r = response(con, con3_4p, con3_4c, con3_2p2c_c),
            con4, con4_r = response(con, con4_4p, con4_4c, con4_2p2c_c),
            information = plyr::mapvalues(con,
                                          c("4p", "2p2c", "4c"),
                                          c("pro", "mixed", "con")),
            information_num = as.numeric(plyr::mapvalues(con,
                                                         c("4p", "2p2c", "4c"),
                                                         c(1, .5, 0))),
            motivation = relevel(as.factor(condition), "accuracy"),
            pre_attitudes = pre_measure_1,
            pre_certainty = pre_certainty_19,
            post_attitudes = post_outcome_1,
            post_certainty = post_certainty_19,
            polsoph1 = as.numeric(polsoph1 == 2),
            polsoph2 = as.numeric(polsoph2 == 1),
            pid3 = pid3,
            ideology = ideology,
            polsoph3 = as.numeric(grepl("2/3|thirds|67", polsoph3, ignore.case = T)),
            polsoph4 = as.numeric(grepl("court|judicial|scotus", polsoph4, ignore.case = T)),
            polsoph5 = as.numeric(grepl("vice|vp", polsoph5, ignore.case = T)),
            timer = log(`Q34_Page Submit`),
            generic_flag = as.numeric(is.na(generic_flag)),
            pre_attitudes_tertiles = factor(ntile(pre_attitudes, 3), 1:3, c("Tertile 1", "Tertile 2", "Tertile 3")),
            pre_certainty_tertiles = factor(ntile(pre_certainty, 3), 1:3, c("Tertile 1", "Tertile 2", "Tertile 3")),
            text = text_combined) -> study1

study1 %>% filter(generic_flag == 1) -> study1

# create spreadsheet for MTurk validation
study1 %>%
  transmute(id, information, motivation, likert, introduction,
            pro1_task = ifelse(!is.na(pro1_r),
                               str_c("<b>Prompt</b>: ", pro1,
                                     "<br><b>Response</b>: ", pro1_r), NA),
            pro2_task = ifelse(!is.na(pro2_r),
                               str_c("<b>Prompt</b>: ", pro2,
                                     "<br><b>Response</b>: ", pro2_r), NA),
            pro3_task = ifelse(!is.na(pro3_r),
                               str_c("<b>Prompt</b>: ", pro3,
                                     "<br><b>Response</b>: ", pro3_r), NA),
            pro4_task = ifelse(!is.na(pro4_r),
                               str_c("<b>Prompt</b>: ", pro4,
                                     "<br><b>Response</b>: ", pro4_r), NA),
            con1_task = ifelse(!is.na(con1_r),
                               str_c("<b>Prompt</b>: ", con1,
                                     "<br><b>Response</b>: ", con1_r), NA),
            con2_task = ifelse(!is.na(con2_r),
                               str_c("<b>Prompt</b>: ", con2,
                                     "<br><b>Response</b>: ", con2_r), NA),
            con3_task = ifelse(!is.na(con3_r),
                               str_c("<b>Prompt</b>: ", con3,
                                     "<br><b>Response</b>: ", con3_r), NA),
            con4_task = ifelse(!is.na(con4_r),
                               str_c("<b>Prompt</b>: ", con4,
                                     "<br><b>Response</b>: ", con4_r), NA)) %>%
  pivot_longer(cols = pro1_task:con4_task) %>%
  na.omit() %>%
  group_by(id) %>%
  summarize(html = paste(value, collapse = '<br><br>'),
            n = n()) -> mturk

#write_csv(mturk, "data/mturk.csv")

# create pk scale
study1$pk_scale <- study1 %>% select(contains("polsoph")) %>% rowSums()/5

study1 %>%
  mutate(pk_scale_tertiles = factor(ntile(pk_scale, 3), 1:3,
                                    c("Tertile 1", "Tertile 2", "Tertile 3"))) -> study1

# recode information
study1$information <- relevel(as.factor(study1$information), "pro")

theme_params <-  theme(panel.background = element_rect(fill = 'gray95'),
                       panel.grid.minor = element_blank(),
                       panel.grid.major = element_blank(),
                       legend.text = element_text(size = 15),
                       axis.text.x = element_text(size = 15),
                       legend.title=element_blank(),
                       axis.text.y = element_text(size = 15),
                       axis.title.y = element_text(size = 15),
                       axis.title.x = element_text(size = 15),
                       legend.position = "bottom",
                       plot.margin = unit(c(.25,.25,.25,.25), "cm"),
                       strip.text.x = element_text(size = 15))
